In [ ]:
!git clone https://github.com/mandrakedrink/ChestCTSegmentation

!pip install albumentations==0.4.6
!pip install git+https://github.com/qubvel/segmentation_models.pytorch

import sys

# Add module in system path
sys.path.append("ChestCTSegmentation")
In [2]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [3]:
!unzip -q '/content/drive/My Drive/ct_chest_seg/images.zip'
!unzip -q '/content/drive/My Drive/ct_chest_seg/masks.zip'
In [36]:
import time
import os

import torch
import numpy as np
import pandas as pd

from train import Trainer
from loss_metric import BCEDiceLoss

from segmentation_models_pytorch.unet import Unet
from IPython.display import clear_output
import warnings
warnings.simplefilter("ignore")
In [49]:
class GlobalConfig:
    def __init__(self):
        self.seed = 555
        self.path_to_csv = '/content/drive/My Drive/ct_chest_seg/train.csv'
        self.path_to_imgs_dir = '/content/images'
        self.path_to_masks_dir = '/content/masks'
        self.pretrained_model_path = '/content/drive/My Drive/ct_chest_seg/pretrained_model/model_100_epoch.pth'
        self.train_logs_path = '/content/drive/My Drive/ct_chest_seg/pretrained_model/train_log_100_epoch.csv'


def seed_everything(seed: int):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    
config = GlobalConfig()
seed_everything(config.seed)
In [50]:
model = Unet('efficientnet-b2', encoder_weights="imagenet", classes=3, activation=None)

Train Process¶

In [51]:
trainer = Trainer(net=model,
                  criterion=BCEDiceLoss(),
                  lr=8e-5,
                  accumulation_steps=32,
                  batch_size=8,
                  num_epochs=1,
                  imgs_dir = config.path_to_imgs_dir,
                  masks_dir = config.path_to_masks_dir,
                  path_to_csv = config.path_to_csv,)

if config.pretrained_model_path is not None:
    trainer.load_predtrain_model(config.pretrained_model_path)
    
    # if need - load the logs.      
    train_logs = pd.read_csv(config.train_logs_path)
    trainer.losses["train"] =  train_logs.loc[:, "train_loss"].to_list()
    trainer.losses["val"] =  train_logs.loc[:, "val_loss"].to_list()
    trainer.dice_scores["train"] = train_logs.loc[:, "train_dice"].to_list()
    trainer.dice_scores["val"] = train_logs.loc[:, "val_dice"].to_list()
    trainer.jaccard_scores["train"] = train_logs.loc[:, "train_jaccard"].to_list()
    trainer.jaccard_scores["val"] = train_logs.loc[:, "val_jaccard"].to_list()
device: cuda
Predtrain model loaded
In [52]:
%%time
trainer.train()
####################
Saved new checkpoint
####################


CPU times: user 35min 19s, sys: 23min 22s, total: 58min 42s
Wall time: 57min 58s

Experiments and Results¶

In [15]:
from dataset_dataloader  import get_dataloader
from utils import compute_scores_per_classes

import matplotlib.pyplot as plt
import seaborn as sns
In [16]:
val_dataloader = get_dataloader(
    imgs_dir=config.path_to_imgs_dir,
    masks_dir=config.path_to_masks_dir,
    path_to_csv=config.path_to_csv,
    phase = "val",
    batch_size = 8,
    num_workers = 6,
    test_size = 0.2,
)
In [17]:
%%time
dice_scores_per_classes, iou_scores_per_classes = compute_scores_per_classes(
    model, val_dataloader, ['lung', 'heart', 'trachea']
    )
CPU times: user 3min 2s, sys: 1min 30s, total: 4min 33s
Wall time: 4min 25s
In [18]:
dice_df = pd.DataFrame(dice_scores_per_classes)
dice_df.columns = ['lung dice', 'heart dice', 'trachea dice']

iou_df = pd.DataFrame(iou_scores_per_classes)
iou_df.columns = ['lung jaccard', 'heart jaccard', 'trachea jaccard']
val_metics_df = pd.concat([dice_df, iou_df], axis=1, sort=True)
val_metics_df = val_metics_df.loc[:, ['lung dice', 'lung jaccard', 
                                      'heart dice', 'heart jaccard', 
                                      'trachea dice', 'trachea jaccard']]
val_metics_df
Out[18]:
lung dice lung jaccard heart dice heart jaccard trachea dice trachea jaccard
0 1.000000 1.000000 1.0 1.0 0.930952 0.870824
1 0.994966 0.989983 1.0 1.0 0.970213 0.942149
2 0.996681 0.993383 1.0 1.0 0.984456 0.969388
3 0.994227 0.988521 1.0 1.0 1.000000 1.000000
4 0.996206 0.992440 1.0 1.0 0.991160 0.982475
... ... ... ... ... ... ...
3337 0.625000 0.454545 1.0 1.0 1.000000 1.000000
3338 0.994015 0.988101 1.0 1.0 0.963835 0.930195
3339 0.988460 0.977183 1.0 1.0 0.950119 0.904977
3340 0.995173 0.990393 1.0 1.0 0.924574 0.859729
3341 0.942254 0.890813 1.0 1.0 1.000000 1.000000

3342 rows × 6 columns

In [19]:
colors = ['#35FCFF', '#FF355A', '#96C503', '#C5035B', '#28B463', '#35FFAF']
palette = sns.color_palette(colors, 6)

fig, ax = plt.subplots(figsize=(12, 6));
sns.barplot(x=val_metics_df.mean().index, y=val_metics_df.mean(), palette=palette, ax=ax);
ax.set_xticklabels(val_metics_df.columns, fontsize=14, rotation=15);
ax.set_title("Dice and Jaccard Coefficients from Validation", fontsize=20)

for idx, p in enumerate(ax.patches):
        percentage = '{:.1f}%'.format(100 * val_metics_df.mean().values[idx])
        x = p.get_x() + p.get_width() / 2 - 0.15
        y = p.get_y() + p.get_height()
        ax.annotate(percentage, (x, y), fontsize=15, fontweight="bold")

fig.savefig("result1.png", format="png",  pad_inches=0.2, transparent=False, bbox_inches='tight')
fig.savefig("result1.svg", format="svg",  pad_inches=0.2, transparent=False, bbox_inches='tight')
In [20]:
colors = ['#35FCFF', '#FF355A', '#28B463', '#35FFAF', '#96C503', '#C5035B']
palette = sns.color_palette(colors[1::], 3)

fig, ax = plt.subplots(1, 2, figsize=(20, 5))
sns.boxplot(data=dice_df, palette=palette, ax=ax[0])
ax[0].set_ylabel("Dice coefficients for 3342 slices", fontsize=14)
ax[0].set_title("Dice coefficients from Validation", fontsize=20)
ax[0].set_xticklabels(dice_df.columns, fontsize=14)

sns.boxplot(data=iou_df, palette=palette, ax=ax[1])
ax[1].set_ylabel("Jaccard coefficients for 3342 slices", fontsize=14)
ax[1].set_title("Jaccard coefficients from Validation", fontsize=20)
ax[1].set_xticklabels(iou_df.columns, fontsize=14)
plt.tight_layout()

fig.savefig("result2.png", format="png",  pad_inches=0.2, transparent=False, bbox_inches='tight')
fig.savefig("result2.svg", format="svg",  pad_inches=0.2, transparent=False, bbox_inches='tight')

and train history logs

In [21]:
train_logs = pd.read_csv(config.train_logs_path)
train_logs.head(5)
Out[21]:
train_loss val_loss train_dice val_dice train_jaccard val_jaccard
0 0.386394 0.176678 0.697667 0.755589 0.646303 0.717975
1 0.144522 0.123104 0.769935 0.786307 0.731782 0.752826
2 0.112448 0.093943 0.811846 0.829476 0.775296 0.795960
3 0.087972 0.073344 0.838840 0.851835 0.804823 0.819167
4 0.073297 0.064866 0.854065 0.861600 0.821115 0.830928
In [22]:
colors = ['#C042FF', '#03C576FF', '#FF355A', '#03C5BF', '#96C503', '#C5035B']
palettes = [sns.color_palette(colors, 2),
            sns.color_palette(colors, 4), 
            sns.color_palette(colors[:2]+colors[-2:] + colors[2:-2], 6)]
            
fig, ax = plt.subplots(1, 3, figsize=(21, 4))

sns.lineplot(data=train_logs.iloc[:, :2], palette=palettes[0], markers=True, ax=ax[0], linewidth=2.5,)
ax[0].set_title("Loss Function during Model Training", fontsize=14)
ax[0].set_xlabel("Epoch", fontsize=14)

sns.lineplot(data=train_logs.iloc[:, 2:], palette=palettes[1], markers=True, ax=ax[1], linewidth=2.5, legend="full")
ax[1].set_title("Dice and Jaccard Coefficients during Model Training", fontsize=14)
ax[1].set_xlabel("Epoch", fontsize=14)

sns.boxplot(data=val_metics_df.iloc[:,:], palette=palettes[2], ax=ax[2])
ax[2].set_title("Dice and Jaccard Coefficients for each Label from Validation", fontsize=14)
ax[2].set_xticklabels(val_metics_df.columns, fontsize=10, rotation=15)

plt.tight_layout()
fig.savefig("result3.png", format="png",  pad_inches=0.2, transparent=False, bbox_inches='tight')
fig.savefig("result3.svg", format="svg",  pad_inches=0.2, transparent=False, bbox_inches='tight')

Now let's make a video with overlapped masks for each slice of one id CT

In [23]:
from utils import (get_one_slice_data, 
                   get_id_predictions, 
                   get_overlaid_masks_on_image,
                   get_overlaid_masks_on_full_ctscan,
                   create_video)
from visualizer import show_video
In [24]:
df = pd.read_csv(config.path_to_csv)
df["Id"] = df['ImageId'].apply(lambda x: x.split("_")[0])

id_ = 'ID00400637202305055099402'
full_scan_example = df.loc[df['Id'] == id_].reset_index(drop=True)
full_scan_example 
Out[24]:
ImageId MaskId Id
0 ID00400637202305055099402_0.jpg ID00400637202305055099402_mask_0.jpg ID00400637202305055099402
1 ID00400637202305055099402_1.jpg ID00400637202305055099402_mask_1.jpg ID00400637202305055099402
2 ID00400637202305055099402_2.jpg ID00400637202305055099402_mask_2.jpg ID00400637202305055099402
3 ID00400637202305055099402_3.jpg ID00400637202305055099402_mask_3.jpg ID00400637202305055099402
4 ID00400637202305055099402_4.jpg ID00400637202305055099402_mask_4.jpg ID00400637202305055099402
... ... ... ...
260 ID00400637202305055099402_260.jpg ID00400637202305055099402_mask_260.jpg ID00400637202305055099402
261 ID00400637202305055099402_261.jpg ID00400637202305055099402_mask_261.jpg ID00400637202305055099402
262 ID00400637202305055099402_262.jpg ID00400637202305055099402_mask_262.jpg ID00400637202305055099402
263 ID00400637202305055099402_263.jpg ID00400637202305055099402_mask_263.jpg ID00400637202305055099402
264 ID00400637202305055099402_264.jpg ID00400637202305055099402_mask_264.jpg ID00400637202305055099402

265 rows × 3 columns

Ground Truth¶

In [38]:
PATH_TO_SAVE = id_ + "_ground_truth"

if not os.path.exists(PATH_TO_SAVE):
    os.mkdir(PATH_TO_SAVE)
    print(f"Folder {PATH_TO_SAVE} created.")

get_overlaid_masks_on_full_ctscan(ct_scan_id_df=full_scan_example,
                                  path_to_save=PATH_TO_SAVE)
In [26]:
%%time
create_video(path_to_imgs=PATH_TO_SAVE, video_name=id_+"_ground_truth", framerate=30)
CPU times: user 2.68 s, sys: 57.7 ms, total: 2.73 s
Wall time: 2.94 s
In [ ]:
# to play in google colab had to recode
#https://ottverse.com/ffmpeg-drawtext-filter-dynamic-overlays-timecode-scrolling-text-credits/
!ffmpeg -i 'ID00400637202305055099402_ground_truth.mp4' -vf "drawtext=text='Ground Truth':x=195:y=8:fontsize=24:fontcolor='#FFFFFF'" -strict -2 'transcoded_video1.mp4'
In [28]:
show_video("transcoded_video1.mp4")
Out[28]:

Prediction¶

In [29]:
imgs, predictions = get_id_predictions(net=model,
                                       ct_scan_id_df=full_scan_example,
                                       root_imgs_dir=config.path_to_imgs_dir)
device: cuda
In [37]:
%%time
PATH_TO_SAVE = id_ + "_predictions"

if not os.path.exists(PATH_TO_SAVE):
    os.mkdir(PATH_TO_SAVE)
    print(f"Folder {PATH_TO_SAVE} created.")

_= [
    get_overlaid_masks_on_image(one_slice_image=image,
                                one_slice_mask=mask, 
                                write=True,
                                path_to_save=PATH_TO_SAVE,
                                name_to_save= str(i_name)
                                ) 
    for i_name, (image, mask) in enumerate(zip(imgs, predictions))
    ]
CPU times: user 1min 21s, sys: 812 ms, total: 1min 22s
Wall time: 1min 22s
In [31]:
%%time
create_video(path_to_imgs=PATH_TO_SAVE, video_name=id_+"_predictions", framerate=30)
CPU times: user 2.74 s, sys: 63.9 ms, total: 2.81 s
Wall time: 2.81 s
In [ ]:
!ffmpeg -i 'ID00400637202305055099402_predictions.mp4' -vf "drawtext=text='Prediction':x=195:y=8:fontsize=24:fontcolor='#FFFFFF'" -strict -2 'transcoded_video2.mp4'
In [33]:
show_video("transcoded_video2.mp4")
Out[33]:

Merging video with ground truth slices and video with predicted slices

In [ ]:
%%shell
#  https://unix.stackexchange.com/questions/233832/merge-two-video-clips-into-one-placing-them-next-to-each-other
ffmpeg \
  -i transcoded_video1.mp4 \
  -i transcoded_video2.mp4 \
  -filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \
  -map [vid] \
  -c:v libx264 \
  -crf 23 \
  -preset veryfast \
  result.mp4
In [35]:
show_video("result.mp4")
Out[35]: